Load Data

NOTE: please knit the calculations.Rmd notebook first to prepare the data used for plotting.

# load marine literature data
marine <-
  readxl::read_excel(file.path("data", "literature.xlsx"), sheet = "marine_data") %>% 
    mutate(type = "marine", info = "marine")

# load terrestrial literature data
terrestrial <- 
  readxl::read_excel(file.path("data", "literature.xlsx"), sheet = "terrestrial_data") %>% 
    mutate(type = "terrestrial", info = reference)

# load lab literature data
lab <-readxl::read_excel(file.path("data", "literature.xlsx"), sheet = "lab_data")

# load experiments info
experiments <- readxl::read_excel(file.path("data", "experiments.xlsx"))

# load strains info
strains <- readxl::read_excel(file.path("data", "strains.xlsx")) 

# load isotope data
iso_data_w_t0 <- read_rds(file.path("cache", "iso_data_w_t0.rds"))

# fractionationc coupling
coupling_data <- read_rds(file.path("cache", "coupling_data.rds"))

Table 1: \(\epsilon^{18}O / \epsilon^{15}N\)

# prepare data
table_data <- 
  coupling_data %>% 
  left_join(strains, by = c("strain_id")) %>% 
  arrange(name, medium) %>% 
  transmute(
    Species = name,
    `Reductase genes` = nitrate_reductases,
    Medium = medium,
    `18eps / 15eps` = sprintf("%.2f +/- %.2f", ON_ratio, ON_ratio_se),
    n = n_datapoints
  )

# save table
table_data %>% export_to_excel(file = file.path("tables", "table_1.xlsx"))
table_data

Figure 2: Environmental Data

# prepare data
p2_data <- bind_rows(marine, terrestrial) %>% 
  group_by(info) %>% mutate(n_datapoints = n()) %>% ungroup() %>% 
  mutate(
    info = case_when(
      type == "marine" ~ sprintf("combined marine (%d points)\n(from %d publications)", n_datapoints, length(unique(marine$reference))),
      TRUE ~ sprintf("%s (%d points)\n(%s)", environment, n_datapoints, reference) 
    ) %>% factor() %>% fct_inorder()
  )

# define slopes
x_range <- c(0, 60)
y_offset <- -1
slope_ribbons <- 
  tibble(
    intercept = y_offset,
    slope = rep(c(1, 0.5), each = 2),
    line = sprintf("%.1f", slope),
    ribbon_width = rep(c(5, 3.5), each = 2),
    x = c(x_range, x_range),
    ymin = intercept + slope * x_range - ribbon_width,
    ymax = intercept + slope * x_range + ribbon_width
  )
slope_lines <- 
  tibble(
    x = 5 + c(x_range * 0.55, x_range * 0.8),
    slope = rep(c(1, 0.5), each = 2),
    line = sprintf("%.1f", slope),
    y = y_offset + slope * x
  )
slope_label <- latex2exp::TeX("$\\frac{^{18}\\epsilon}{^{15}\\epsilon}$ ratio")

# generate plot
p2 <- p2_data %>%
  ggplot() +
  # slope ribbons
  geom_ribbon(
    data = slope_ribbons,
    mapping = aes(x = x, ymin = ymin, ymax = ymax, fill = line),
    alpha = 0.2,
    show.legend = TRUE
  ) +
  # slopes
  geom_line(data = slope_lines, mapping = aes(x, y, linetype = line)) +
  # arrows
  geom_line(data = slope_lines,
    mapping = aes(x, y, group = line),
    linetype = 0, arrow = arrow(length = unit(0.03, "npc"), type = "closed")
  ) + 
  # data points
  geom_point(
    data = function(df) filter(df, type == "marine"), 
    mapping = aes(d15N, d18O, color = info, shape = info),
    size = 3, alpha = 0.3
  ) +
  geom_point(
    data = function(df) filter(df, type == "terrestrial"),
    mapping = aes(d15N, d18O, color = info, shape = info),
    size = 3
  ) +
  coord_cartesian(xlim = c(0, 60), ylim = c(0, 40)) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  scale_color_manual(values = c("black", cb_palette[c(7,2:6)]), drop = FALSE) +
  scale_shape_manual(values = c(22, rep(19, 4), rep(17, 2)), drop = FALSE) +
  scale_fill_manual(values = c("black", "grey")) +
  scale_linetype_manual(values = c(2, 1)) +
  theme_figure(text_size = 14) +
  theme(legend.text = element_text(margin = margin(t = 3, b = 3))) +
  guides(color=guide_legend(override.aes=list(fill=NA))) +
  # labels
  labs(
    x = latex2exp::TeX("$\\delta^{15}N$ of $NO_3^-$ vs Air"),
    y = latex2exp::TeX("$\\delta^{18}O$ of $NO_3^-$ vs VSMOW"),
    shape = "datasets", color = "datasets", 
    linetype = slope_label, fill = slope_label
  )

# full plot
p2

Figure 3: PA14 WT & mutants

# prepare data frame for plotting
p3_data <- 
  iso_data_w_t0 %>%
  left_join(experiments, by = c("exp_id")) %>% 
  left_join(strains, by = c("strain_id")) %>% 
  filter(strain_id %in% c("PA14", "napKO", "narKO"), enriched_water != "yes") %>% 
  mutate(
    name = str_remove(name, "P. aeruginosa"),
    name_medium = sprintf("%s (%s)", name, medium),
    panel = factor(strain_id, levels = c("napKO", "PA14", "narKO")) %>% 
      fct_recode(
        "Nar only ($\\Delta{}napA$)" = "napKO",
        "Both (PA14 wild type)" = "PA14",
        "Nap only ($\\Delta{}narG$)" = "narKO"
      )
  )

# generate plot
p3 <- p3_data %>% 
  ggplot() +
  aes(x = D_d15N, y = D_d18O, color = name_medium, shape = name_medium) +
  # 0.5 and 1.0 reference lines
  geom_abline(
    data = tibble(
      icept = 0, slope = c(1, 0.5), 
      line = sprintf("%.1f", slope) %>% factor() %>% fct_inorder()
    ),
    mapping = aes(
      x = NULL, y = NULL, color = NULL, shape = NULL,
      intercept = icept, slope = slope, linetype = line
    )
  ) +
  # data
  geom_point(size = 4, alpha = 0.9) +
  # plot styling
  facet_wrap(~panel, nrow = 1, labeller = latex_labeller) +
  scale_shape_manual(values = c(15, 0, 17, 2, 16)) +
  scale_colour_manual(
    values = c("#660099", "#660099", "#0066CC", "#0066CC","#CC0000")
  ) +
  theme_figure(text_size = 16) +
  labs(
     x = latex2exp::TeX("$\\Delta \\delta^{15}N$ of  $NO_3^-$"),
     y = latex2exp::TeX("$\\Delta \\delta^{18}O$ of  $NO_3^-$"),
     linetype = latex2exp::TeX("$\\frac{^{18}\\epsilon}{^{15}\\epsilon}$ ratio"),
     shape = "experiment",
     color = "experiment"
  ) + 
  guides(color = guide_legend(override.aes = list(linetype = 0)))

# full plot without legend
p3 + theme(legend.position = "none")

Figure 4: \(\epsilon^{18}O / \epsilon^{15}N\)

Data preparation

# combine data for plotting
p4_data_all <-
  # summarize data from this study and the literature
  bind_rows(
    lab %>% mutate(source = reference),
    coupling_data %>% mutate(source = "this study")
  ) %>% 
  select(strain_id, ON_ratio, ON_ratio_se, source) %>% 
  # exclude strains with combined nap/nar use
  filter(strain_id != "PA14")

# references
sources <- tibble(
  source = p4_data_all$source %>% unique(),
  symbol = c("\u00a7", "\u2020", "#", "*")
)

# prepare data for plotting
p4_data <- 
  p4_data_all %>% left_join(sources, by = "source") %>% 
  # calculate means and range for each strain
  group_by(strain_id) %>%
  summarize(
    sources = paste(unique(symbol), collapse = ","),
    ON_ratio_mean = mean(ON_ratio),
    ON_ratio_min = ifelse(n() > 1, min(ON_ratio), ON_ratio - 2 * ON_ratio_se),
    ON_ratio_max = ifelse(n() > 1, max(ON_ratio), ON_ratio + 2 * ON_ratio_se),
    .groups = "drop"
  ) %>%
  # add strain information in
  left_join(strains, by = "strain_id") %>% 
  # arrange in specific order for the trees
  mutate(
    strain_id = factor(
      strain_id, 
      levels = rev(c(
        "TA", "AA", "PD", "PC", "napKO", "BB", "BV", # nar
        "DD", "SG", "RS", "narKO", "SL" # nap
      ))
    )
  ) %>% 
  arrange(strain_id) %>% 
  # include italic and symbols for sources
  mutate(
    name = sprintf("$\\textit{%s}^{%s}$", name, sources) %>% 
      str_replace("\U0394(.*)", "}\\\\,\\\\Delta{}{\\1") %>% 
      factor() %>% fct_inorder()
  )

Isotope data plots

# generate main plot
p4 <- p4_data %>% 
  ggplot() +
  aes(name, ON_ratio_mean, ymin = ON_ratio_min, ymax = ON_ratio_max, color = nitrate_reductases, shape = nitrate_reductases) +
  geom_errorbar(width = 0, show.legend = FALSE) +
  # 0.5 and 1.0 reference lines
  geom_hline(
    data = tibble(
      icept = c(1.0, 0.5), 
      line = sprintf("%.1f", icept) %>% factor() %>% fct_inorder()
    ),
    mapping = aes(
      x = NULL, y = NULL, color = NULL, shape = NULL,
      yintercept = icept, linetype = line
    )
  ) +
  # data
  geom_point(size = 4.5, stat = "unique") +
  # plot styling
  scale_shape_manual(values = c(15, 17, 16)) +
  scale_color_manual(values = c("#660099", "#CC0000", "#0066CC")) +
  scale_x_discrete(
    labels = function(x) latex_labeller(x) %>% unlist() %>% unname()
  ) +
  scale_y_continuous(breaks = c(5:10) * 0.1, expand = c(0, 0)) +
  expand_limits(y = c(0.40, 1.1)) +
  theme_figure(text_size = 16) +
  theme(axis.text.y = element_text(hjust = 0, vjust = 0.5)) +
  labs(
    x = NULL, y = latex2exp::TeX("$^{18}\\epsilon :^{15}\\epsilon$"),
    linetype = latex2exp::TeX("$\\frac{^{18}\\epsilon}{^{15}\\epsilon}$"),
    color = "Reductase gene(s)", shape = "Reductase gene(s)"
  ) +
  guides(color = guide_legend(override.aes = list(linetype = 0))) +
  coord_flip()
  
# plot without legend
p4 + theme(legend.position = "none")

# generate top plot
p4_top <-
  p4_data_all %>% 
  left_join(sources, by = "source") %>% 
  left_join(strains, by = "strain_id") %>% 
  mutate(nitrate_reductases = factor(nitrate_reductases)) %>%
  filter(nitrate_reductases %in% c("napA", "narG")) %>%
  ggplot() + 
  aes(x = ON_ratio, after_stat(scaled), fill = nitrate_reductases) + 
  geom_density(trim = FALSE, alpha = 1.0) + 
  scale_x_continuous(breaks = c(5:10) * 0.1, expand = c(0, 0) ) +
  scale_y_continuous(labels = function(x) sprintf("%.0f%%", 100*x), expand = c(0, 0)) +
  scale_fill_manual(values = c("#660099", "#CC0000", "#0066CC"), drop = FALSE, guide = "none") +
  expand_limits(x = c(0.3, 1.2)) +
  coord_cartesian(x = c(0.40, 1.1), y = c(0, 1.05)) +
  theme_figure(text_size = 16, grid = FALSE) +
  theme(axis.ticks.y = element_blank(), axis.text.y = element_blank()) +
  labs(y = "distribution", x = latex2exp::TeX("$^{18}\\epsilon :^{15}\\epsilon$")) 

p4_top + theme(legend.position = "none")

Phylogenetic tree

library(treeio)
library(ggtree)
tree_nar <- treeio::read.nexus("data/nar_tree.nex.trprobs") %>% 
  phangorn::maxCladeCred() %>% 
  drop.tip("Ecoli_TorZ") # E.coli is the outgroup
branch.length <- 5
tree_nar$root_edge <- 2
p_nar <- 
  tree_nar %>% 
  ggtree() +
  #geom_tiplab() + 
  geom_rootedge(rootedge = 0.3)
p_nar <- p_nar %>% flip(13, 11) %>% flip(5, 12)

tree_nap <- treeio::read.nexus("data/nap_tree.nex.trprobs") %>% 
  phangorn::maxCladeCred() %>% 
  drop.tip("Ecoli_TorZ") # E.coli is the outgroup
tree_nap$root_edge <- 2
p_nap <- 
  tree_nap %>% 
  ggtree() +
  #geom_tiplab() + 
  geom_rootedge(rootedge = 0.3)
p_nap <- p_nap %>% flip(1, 7) %>% flip(5, 9)

# combine tree
p_tree <- 
  cowplot::plot_grid(
    p_nar + labs(y = "Nar Reductase") + 
      theme(text = element_text(size = 16),
            plot.margin = margin(b = -5)), 
    p_nap + labs(y = "Nap Reductase") + 
      theme(text = element_text(size = 16),
            plot.margin = margin(t = -5)), 
    ncol = 1L,
    rel_heights = c(7, 5)
  )
p_tree

Full plot

library(cowplot)

# left plot
p_left <- plot_grid(
  ggdraw(),
  p_tree + theme(plot.margin = margin(b = 40, t = 4)), 
  ncol = 1L, rel_heights = c(0.2, 0.8)
)

# right plot
p_right <- 
  plot_grid(
    p4_top + 
      geom_point(
        mapping = aes(y = ON_ratio, shape = nitrate_reductases, color = nitrate_reductases),
        size = 0, alpha = 0
      ) +
      scale_color_manual(values = c("#660099", "#CC0000", "#0066CC"), drop = FALSE) +
      scale_shape_manual(values = c(15, 17, 16), drop = FALSE) +
      guides(color = guide_legend(override.aes = list(
        linetype = 0, alpha = 1, size = 4.5, shape = c(15, 16, 17)
      ))) +
      labs(color = "Reductase\ngene(s)", shape = "Reductase\ngene(s)") +
      theme(
        legend.position = "left", 
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank()
      ), 
    p4 + theme(legend.position = "none"),
    ncol = 1L, align = "v", axis = "lr", rel_heights = c(0.2, 0.8)
  )

# combined plot
plot_grid(p_left, p_right, nrow = 1, rel_widths = c(0.25, 0.75))